From 614e4c4ebc75517295bccd29b20ddbc5b52af6fc Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 12 Nov 2015 11:00:04 +0100 Subject: perf/core: Robustify the perf_cgroup_from_task() RCU checks This patch reinforces the lockdep checks performed by perf_cgroup_from_tsk() by passing the perf_event_context whenever possible. It is okay to not hold the RCU read lock when we know we hold the ctx->lock. This patch makes sure this property holds. In some functions, such as perf_cgroup_sched_in(), we do not pass the context because we are sure we are holding the RCU read lock. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: edumazet@google.com Link: http://lkml.kernel.org/r/1447322404-10920-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 377e8f8ed391..a316ca96f1b6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -298,7 +298,7 @@ static bool __match_event(struct perf_event *a, struct perf_event *b) static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event) { if (event->attach_state & PERF_ATTACH_TASK) - return perf_cgroup_from_task(event->hw.target); + return perf_cgroup_from_task(event->hw.target, event->ctx); return event->cgrp; } -- cgit From b28ae9560b693bcd2e9f4d6d9c415d5380b7c3c5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 20 Oct 2015 11:46:33 -0700 Subject: perf/x86: Fix LBR call stack save/restore This fixes a bug I added in the following commit: 90405aa02247 ("perf/x86/intel/lbr: Limit LBR accesses to TOS in callstack mode") The bug could lead to lost LBR call stacks. When restoring the LBR state we need to use the TOS of the previous context, not the current context. To do that we need to save/restore the TOS. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/1445366797-30894-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel_lbr.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 499f533dd3cc..ffa7a92025e1 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -627,6 +627,7 @@ struct x86_perf_task_context { u64 lbr_from[MAX_LBR_ENTRIES]; u64 lbr_to[MAX_LBR_ENTRIES]; u64 lbr_info[MAX_LBR_ENTRIES]; + int tos; int lbr_callstack_users; int lbr_stack_state; }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index bfd0b717e944..659f01e165d5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -239,7 +239,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) } mask = x86_pmu.lbr_nr - 1; - tos = intel_pmu_lbr_tos(); + tos = task_ctx->tos; for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); @@ -247,6 +247,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } + wrmsrl(x86_pmu.lbr_tos, tos); task_ctx->lbr_stack_state = LBR_NONE; } @@ -270,6 +271,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } + task_ctx->tos = tos; task_ctx->lbr_stack_state = LBR_VALID; } -- cgit From 90eec103b96e30401c0b846045bf8a1c7159b6da Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Nov 2015 11:08:45 +0100 Subject: treewide: Remove old email address There were still a number of references to my old Red Hat email address in the kernel source. Remove these while keeping the Red Hat copyright notices intact. Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/cpu/perf_event.h | 2 +- arch/x86/kernel/irq_work.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4562cf070c27..2bf79d7c97df 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -5,7 +5,7 @@ * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra * Copyright (C) 2009 Intel Corporation, * Copyright (C) 2009 Google, Inc., Stephane Eranian * diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ffa7a92025e1..ab18b8a91583 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -5,7 +5,7 @@ * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra * Copyright (C) 2009 Intel Corporation, * Copyright (C) 2009 Google, Inc., Stephane Eranian * diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index dc5fa6a1e8d6..3512ba607361 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -1,7 +1,7 @@ /* * x86 specific code for irq_work * - * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra */ #include -- cgit From de0afc9bdeeadaa998797d2333c754bf9f4d5dcf Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 2 Dec 2015 12:10:48 -0500 Subject: xen: Resume PMU from non-atomic context Resuming PMU currently triggers a warning from ___might_sleep() (assuming CONFIG_DEBUG_ATOMIC_SLEEP is set) when xen_pmu_init() allocates GFP_KERNEL page because we are in state resembling atomic context. Move resuming PMU to xen_arch_resume() which is called in regular context. For symmetry move suspending PMU to xen_arch_suspend() as well. Signed-off-by: Boris Ostrovsky Reported-by: Konrad Rzeszutek Wilk Cc: # 4.3 Signed-off-by: David Vrabel --- arch/x86/xen/suspend.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index feddabdab448..3705eabd7e22 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -68,26 +68,16 @@ static void xen_pv_post_suspend(int suspend_cancelled) void xen_arch_pre_suspend(void) { - int cpu; - - for_each_online_cpu(cpu) - xen_pmu_finish(cpu); - if (xen_pv_domain()) xen_pv_pre_suspend(); } void xen_arch_post_suspend(int cancelled) { - int cpu; - if (xen_pv_domain()) xen_pv_post_suspend(cancelled); else xen_hvm_post_suspend(cancelled); - - for_each_online_cpu(cpu) - xen_pmu_init(cpu); } static void xen_vcpu_notify_restore(void *data) @@ -106,10 +96,20 @@ static void xen_vcpu_notify_suspend(void *data) void xen_arch_resume(void) { + int cpu; + on_each_cpu(xen_vcpu_notify_restore, NULL, 1); + + for_each_online_cpu(cpu) + xen_pmu_init(cpu); } void xen_arch_suspend(void) { + int cpu; + + for_each_online_cpu(cpu) + xen_pmu_finish(cpu); + on_each_cpu(xen_vcpu_notify_suspend, NULL, 1); } -- cgit From e0fbac1cd49c5ca6cb6a0aa918d0943ce95d906c Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Tue, 24 Nov 2015 12:05:01 -0500 Subject: perf/x86/intel: Make L1D_PEND_MISS.FB_FULL not constrained on Haswell There was a mistake in the Haswell constraints table. Signed-off-by: Yuanfang Chen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1448384701-9110-1-git-send-email-cheny@udel.edu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f63360be2238..e2a430021e46 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -232,7 +232,7 @@ static struct event_constraint intel_hsw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ - INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */ + INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ -- cgit From 169b932a15318e8e9f2f0f12eeb55dda09c8737f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Nov 2015 10:24:31 +0100 Subject: perf/x86/intel: Fix INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA macro We need to add rest of the flags to the constraint mask instead of another INTEL_ARCH_EVENT_MASK, fixing a typo. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1447061071-28085-1-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ab18b8a91583..d0e35ebb2adb 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -387,7 +387,7 @@ struct cpu_hw_events { /* Check flags and event code/umask, and set the HSW N/A flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ __EVENT_CONSTRAINT(code, n, \ - INTEL_ARCH_EVENT_MASK|INTEL_ARCH_EVENT_MASK, \ + INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW) -- cgit From 8090bfd2bb9abc6293f5cedef8ec9be84a913d2f Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 29 Nov 2015 21:13:50 +0100 Subject: um: Fix fpstate handling The x86 FPU cleanup changed fpstate to a plain integer. UML on x86 has to deal with that too. Signed-off-by: Richard Weinberger --- arch/x86/um/signal.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 06934a8a4872..e5f854ce2d72 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -211,7 +211,7 @@ static int copy_sc_from_user(struct pt_regs *regs, if (err) return 1; - err = convert_fxsr_from_user(&fpx, sc.fpstate); + err = convert_fxsr_from_user(&fpx, (void *)sc.fpstate); if (err) return 1; @@ -227,7 +227,7 @@ static int copy_sc_from_user(struct pt_regs *regs, { struct user_i387_struct fp; - err = copy_from_user(&fp, sc.fpstate, + err = copy_from_user(&fp, (void *)sc.fpstate, sizeof(struct user_i387_struct)); if (err) return 1; @@ -291,7 +291,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, #endif #undef PUTREG sc.oldmask = mask; - sc.fpstate = to_fp; + sc.fpstate = (unsigned long)to_fp; err = copy_to_user(to, &sc, sizeof(struct sigcontext)); if (err) @@ -468,12 +468,10 @@ long sys_sigreturn(void) struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); sigset_t set; struct sigcontext __user *sc = &frame->sc; - unsigned long __user *oldmask = &sc->oldmask; - unsigned long __user *extramask = frame->extramask; int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); - if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) || - copy_from_user(&set.sig[1], extramask, sig_size)) + if (copy_from_user(&set.sig[0], (void *)sc->oldmask, sizeof(set.sig[0])) || + copy_from_user(&set.sig[1], frame->extramask, sig_size)) goto segfault; set_current_blocked(&set); @@ -505,6 +503,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, { struct rt_sigframe __user *frame; int err = 0, sig = ksig->sig; + unsigned long fp_to; frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16); @@ -526,7 +525,10 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs)); err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]); - err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate); + + fp_to = (unsigned long)&frame->fpstate; + + err |= __put_user(fp_to, &frame->uc.uc_mcontext.fpstate); if (sizeof(*set) == 16) { err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); -- cgit From 8b89fe1f6c430589122542f228a802d34995bebd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 10 Dec 2015 18:37:32 +0100 Subject: kvm: x86: move tracepoints outside extended quiescent state Invoking tracepoints within kvm_guest_enter/kvm_guest_exit causes a lockdep splat. Reported-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 3 ++- arch/x86/kvm/x86.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 83a1c643f9a5..899c40f826dd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3422,6 +3422,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; + trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); + if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) @@ -3892,8 +3894,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; - trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM); - if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_handle_nmi(&svm->vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index af823a388c19..6b5605607849 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8042,6 +8042,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; + trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); + /* * Flush logged GPAs PML buffer, this will make dirty_bitmap more * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before @@ -8668,7 +8670,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); - trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); /* * the KVM_REQ_EVENT optimization bit is only on for one entry, and if diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eed32283d22c..b84ba4b17757 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6515,6 +6515,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); + trace_kvm_entry(vcpu->vcpu_id); + wait_lapic_expire(vcpu); __kvm_guest_enter(); if (unlikely(vcpu->arch.switch_db_regs)) { @@ -6527,8 +6529,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } - trace_kvm_entry(vcpu->vcpu_id); - wait_lapic_expire(vcpu); kvm_x86_ops->run(vcpu); /* -- cgit From 20f36e0380a7e871a711d5e4e59d04d4948326b4 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Sat, 12 Dec 2015 19:25:55 -0500 Subject: xen/x86/pvh: Use HVM's flush_tlb_others op Using MMUEXT_TLB_FLUSH_MULTI doesn't buy us much since the hypervisor will likely perform same IPIs as would have the guest. More importantly, using MMUEXT_INVLPG_MULTI may not to invalidate the guest's address on remote CPU (when, for example, VCPU from another guest is running there). Signed-off-by: Boris Ostrovsky Suggested-by: Jan Beulich Signed-off-by: David Vrabel --- arch/x86/xen/mmu.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ac161db63388..cb5e266a8bf7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2495,14 +2495,9 @@ void __init xen_init_mmu_ops(void) { x86_init.paging.pagetable_init = xen_pagetable_init; - /* Optimization - we can use the HVM one but it has no idea which - * VCPUs are descheduled - which means that it will needlessly IPI - * them. Xen knows so let it do the job. - */ - if (xen_feature(XENFEAT_auto_translated_physmap)) { - pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; + if (xen_feature(XENFEAT_auto_translated_physmap)) return; - } + pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); -- cgit From 81b1b9ca6d5ca5f3ce91c0095402def657cf5db3 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Mon, 14 Dec 2015 23:13:38 +0800 Subject: KVM: VMX: Fix host initiated access to guest MSR_TSC_AUX The current handling of accesses to guest MSR_TSC_AUX returns error if vcpu does not support rdtscp, though those accesses are initiated by host. This can result in the reboot failure of some versions of QEMU. This patch fixes this issue by passing those host initiated accesses for further handling instead. Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6b5605607849..44976a596fa6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2803,7 +2803,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.ia32_xss; break; case MSR_TSC_AUX: - if (!guest_cpuid_has_rdtscp(vcpu)) + if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) return 1; /* Otherwise falls through */ default: @@ -2909,7 +2909,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) clear_atomic_switch_msr(vmx, MSR_IA32_XSS); break; case MSR_TSC_AUX: - if (!guest_cpuid_has_rdtscp(vcpu)) + if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) return 1; /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) -- cgit From 173ae9ba63349194ae180622f05fb49f55a4df46 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 15 Dec 2015 10:15:57 -0800 Subject: Fix user-visible spelling error Pavel Machek reports a warning about W+X pages found in the "Persisent" kmap area. After grepping for it (using the correct spelling), and not finding it, I noticed how the debug printk was just misspelled. Fix it. The actual mapping bug that Pavel reported is still open. It's apparently a separate issue from the known EFI page tables, looks like it's related to the HIGHMEM mappings. Signed-off-by: Linus Torvalds --- arch/x86/mm/dump_pagetables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index a035c2aa7801..0f1c6fc3ddd8 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -89,7 +89,7 @@ static struct addr_marker address_markers[] = { { 0/* VMALLOC_START */, "vmalloc() Area" }, { 0/*VMALLOC_END*/, "vmalloc() End" }, # ifdef CONFIG_HIGHMEM - { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, + { 0/*PKMAP_BASE*/, "Persistent kmap() Area" }, # endif { 0/*FIXADDR_START*/, "Fixmap Area" }, #endif -- cgit From a7f2d7865720ff13d5b0f2a3bb1fd80dc3d7a73f Mon Sep 17 00:00:00 2001 From: Alexis Dambricourt Date: Mon, 14 Dec 2015 15:39:34 +0100 Subject: KVM: MTRR: fix fixed MTRR segment look up This fixes the slow-down of VM running with pci-passthrough, since some MTRR range changed from MTRR_TYPE_WRBACK to MTRR_TYPE_UNCACHABLE. Memory in the 0K-640K range was incorrectly treated as uncacheable. Fixes: f7bfb57b3e89ff89c0da9f93dedab89f68d6ca27 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=107561 Cc: qemu-stable@nongnu.org Signed-off-by: Alexis Dambricourt [Use correct BZ for "Fixes" annotation. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mtrr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 9e8bf13572e6..adc54e1d40a9 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -267,7 +267,7 @@ static int fixed_mtrr_addr_to_seg(u64 addr) for (seg = 0; seg < seg_num; seg++) { mtrr_seg = &fixed_seg_table[seg]; - if (mtrr_seg->start >= addr && addr < mtrr_seg->end) + if (mtrr_seg->start <= addr && addr < mtrr_seg->end) return seg; } -- cgit From fa7c4ebd5ae0c22f9908436303106a9ffcf0cf42 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 14 Dec 2015 16:57:31 +0100 Subject: KVM: MTRR: observe maxphyaddr from guest CPUID, not host Conversion of MTRRs to ranges used the maxphyaddr from the boot CPU. This is wrong, because var_mtrr_range's mask variable then is discontiguous (like FF00FFFF000, where the first run of 0s corresponds to the bits between host and guest maxphyaddr). Instead always set up the masks to be full 64-bit values---we know that the reserved bits at the top are zero, and we can restore them when reading the MSR. This way var_mtrr_range gets a mask that just works. Fixes: a13842dc668b40daef4327294a6d3bdc8bd30276 Cc: qemu-stable@nongnu.org Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=107561 Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mtrr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index adc54e1d40a9..7747b6d716fa 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -300,7 +300,6 @@ static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end) *start = range->base & PAGE_MASK; mask = range->mask & PAGE_MASK; - mask |= ~0ULL << boot_cpu_data.x86_phys_bits; /* This cannot overflow because writing to the reserved bits of * variable MTRRs causes a #GP. @@ -356,10 +355,14 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (var_mtrr_range_is_valid(cur)) list_del(&mtrr_state->var_ranges[index].node); + /* Extend the mask with all 1 bits to the left, since those + * bits must implicitly be 0. The bits are then cleared + * when reading them. + */ if (!is_mtrr_mask) cur->base = data; else - cur->mask = data; + cur->mask = data | (-1LL << cpuid_maxphyaddr(vcpu)); /* add it to the list if it's enabled. */ if (var_mtrr_range_is_valid(cur)) { @@ -426,6 +429,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) *pdata = vcpu->arch.mtrr_state.var_ranges[index].base; else *pdata = vcpu->arch.mtrr_state.var_ranges[index].mask; + + *pdata &= (1ULL << cpuid_maxphyaddr(vcpu)) - 1; } return 0; -- cgit From e24dea2afc6a0852983dc741072d8e96155e13f5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 22 Dec 2015 15:20:00 +0100 Subject: KVM: MTRR: treat memory as writeback if MTRR is disabled in guest CPUID Virtual machines can be run with CPUID such that there are no MTRRs. In that case, the firmware will never enable MTRRs and it is obviously undesirable to run the guest entirely with UC memory. Check out guest CPUID, and use WB memory if MTRR do not exist. Cc: qemu-stable@nongnu.org Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=107561 Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.h | 8 ++++++++ arch/x86/kvm/mtrr.c | 14 +++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 06332cb7e7d1..3f5c48ddba45 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -38,6 +38,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) return best && (best->ecx & bit(X86_FEATURE_XSAVE)); } +static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 1, 0); + return best && (best->edx & bit(X86_FEATURE_MTRR)); +} + static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 7747b6d716fa..3f8c732117ec 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -120,14 +120,22 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state) return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK; } -static u8 mtrr_disabled_type(void) +static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu) { /* * Intel SDM 11.11.2.2: all MTRRs are disabled when * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC * memory type is applied to all of physical memory. + * + * However, virtual machines can be run with CPUID such that + * there are no MTRRs. In that case, the firmware will never + * enable MTRRs and it is obviously undesirable to run the + * guest entirely with UC memory and we use WB. */ - return MTRR_TYPE_UNCACHABLE; + if (guest_cpuid_has_mtrr(vcpu)) + return MTRR_TYPE_UNCACHABLE; + else + return MTRR_TYPE_WRBACK; } /* @@ -675,7 +683,7 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) } if (iter.mtrr_disabled) - return mtrr_disabled_type(); + return mtrr_disabled_type(vcpu); /* not contained in any MTRRs. */ if (type == -1) -- cgit From 0185604c2d82c560dab2f2933a18f797e74ab5a8 Mon Sep 17 00:00:00 2001 From: Andrew Honig Date: Wed, 18 Nov 2015 14:50:23 -0800 Subject: KVM: x86: Reload pit counters for all channels when restoring state Currently if userspace restores the pit counters with a count of 0 on channels 1 or 2 and the guest attempts to read the count on those channels, then KVM will perform a mod of 0 and crash. This will ensure that 0 values are converted to 65536 as per the spec. This is CVE-2015-7513. Signed-off-by: Andy Honig Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b84ba4b17757..7ffc224bbe41 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3572,9 +3572,11 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) { + int i; mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); - kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); + for (i = 0; i < 3; i++) + kvm_pit_load_count(kvm, i, ps->channels[i].count, 0); mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } @@ -3593,6 +3595,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) { int start = 0; + int i; u32 prev_legacy, cur_legacy; mutex_lock(&kvm->arch.vpit->pit_state.lock); prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; @@ -3602,7 +3605,8 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels, sizeof(kvm->arch.vpit->pit_state.channels)); kvm->arch.vpit->pit_state.flags = ps->flags; - kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); + for (i = 0; i < 3; i++) + kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, start); mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } -- cgit From de3793796f78e293cc0873744a75588c99ed2fdd Mon Sep 17 00:00:00 2001 From: Mickaël Salaün Date: Tue, 22 Dec 2015 21:44:01 +0100 Subject: um: Fix pointer cast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a pointer cast typo introduced in v4.4-rc5 especially visible for the i386 subarchitecture where it results in a kernel crash. [ Also removed pointless cast as per Al Viro - Linus ] Fixes: 8090bfd2bb9a ("um: Fix fpstate handling") Signed-off-by: Mickaël Salaün Cc: Jeff Dike Acked-by: Richard Weinberger Signed-off-by: Linus Torvalds --- arch/x86/um/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index e5f854ce2d72..14fcd01ed992 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -470,7 +470,7 @@ long sys_sigreturn(void) struct sigcontext __user *sc = &frame->sc; int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); - if (copy_from_user(&set.sig[0], (void *)sc->oldmask, sizeof(set.sig[0])) || + if (copy_from_user(&set.sig[0], &sc->oldmask, sizeof(set.sig[0])) || copy_from_user(&set.sig[1], frame->extramask, sig_size)) goto segfault; -- cgit